## String functions on Columns

### EDA   for columns

In [1]:
import pandas as pd
import numpy as np

In [3]:
df=pd.DataFrame({
'First Name':['Sahil','Sonia','Sourav','Vishal'],
'Age':[10,20,30,40],
'Gender':['M','F','M','M'],
'City':['J','K','L','P'],
'Place of Work':[True,False,False,True],
}
)
df

Unnamed: 0,First Name,Age,Gender,City,Place of Work
0,Sahil,10,M,J,True
1,Sonia,20,F,K,False
2,Sourav,30,M,L,False
3,Vishal,40,M,P,True


#### Get columns as list

In [4]:
df.columns.tolist()

['First Name', 'Age', 'Gender', 'City', 'Place of Work']

#### Convert column names to series | df:

In [9]:
df.columns.to_series()

First Name          First Name
Age                        Age
Gender                  Gender
City                      City
Place of Work    Place of Work
dtype: object

In [11]:
df.columns.to_frame()

Unnamed: 0,0
First Name,First Name
Age,Age
Gender,Gender
City,City
Place of Work,Place of Work


#### Check if specific column is there or not

In [15]:
df.columns.str.contains('Name') 

array([ True, False, False, False, False])

#### Check if any duplicate column is there

In [17]:
df.columns.duplicated()

array([False, False, False, False, False])

#### Check methods/attributes of String

In [20]:
dir(df.columns.str)[0:5]

['__annotations__', '__class__', '__delattr__', '__dict__', '__dir__']

#### Make column names to lower case

In [22]:
df.columns.str.lower()

Index(['first name', 'age', 'gender', 'city', 'place of work'], dtype='object')

#### Make column names to Upper case

In [23]:
df.columns.str.upper()

Index(['FIRST NAME', 'AGE', 'GENDER', 'CITY', 'PLACE OF WORK'], dtype='object')

#### Make column names to Title case

In [26]:
df.columns.str.title()  # Camel Case

Index(['First Name', 'Age', 'Gender', 'City', 'Place Of Work'], dtype='object')

#### Make column names to Capitalize

In [28]:
df.columns.str.capitalize() # Only first letter big

Index(['First name', 'Age', 'Gender', 'City', 'Place of work'], dtype='object')

#### Replace empty spaces with underscores

In [29]:
df.columns.str.replace(' ','-')

Index(['First-Name', 'Age', 'Gender', 'City', 'Place-of-Work'], dtype='object')

#### Rename columns

In [30]:
df.rename(columns={'oldname':'newname'},inplace=True)

#### Check total number of columns

In [32]:
len(df.columns)

5

#### Select particular columns

In [33]:
df.columns.values[0:4]

array(['First Name', 'Age', 'Gender', 'City'], dtype=object)

#### Get 2nd column and rename it

In [34]:
df.columns.values[2]='DOB'

In [35]:
df

Unnamed: 0,First Name,Age,DOB,City,Place of Work
0,Sahil,10,M,J,True
1,Sonia,20,F,K,False
2,Sourav,30,M,L,False
3,Vishal,40,M,P,True


#### Select all columns except one

In [40]:
df.columns[df.columns!= 'DOB']

Index(['First Name', 'Age', 'City', 'Place of Work'], dtype='object')

In [41]:
df

Unnamed: 0,First Name,Age,DOB,City,Place of Work
0,Sahil,10,M,J,True
1,Sonia,20,F,K,False
2,Sourav,30,M,L,False
3,Vishal,40,M,P,True


#### Select all columns except multiple

In [43]:
#?
df.loc[:,-df.columns.isin(['DOB','City']).columns]

AttributeError: 'numpy.ndarray' object has no attribute 'columns'

#### Select column names that begins with particular word

In [48]:
df.columns.str.startswith('First')
# Gives array of booleans

array([ True, False, False, False, False])

#### Select group of column names

In [46]:
df.columns.values[[0,1,2]]

array(['First Name', 'Age', 'DOB'], dtype=object)

In [47]:
df.columns[0:3]

Index(['First Name', 'Age', 'DOB'], dtype='object')